별점분포의 mean, var, skewness 구하기 - expect_df

$\mu = E[X] = \sum x \cdot p(x)$
$\sigma^2 = E[(X - \mu)^2] = \sum (x - \mu)^2 p(x)$
$\frac{\mu_3}{\sigma^3} = \operatorname{E}\left[\left(\frac{X-\mu}{\sigma}\right)^3 \right]$



In [1]:

    
df = pd.read_csv('../resource/preprocess_dist_df.csv')
col = df.columns.astype(float).values
df.head()

별점분포의 X_count



In [2]:

    
df['sum'] = df.sum(axis=1)
df.head()

$p(X)$



In [3]:

    
1312 / 58122









    Out[3]:





0.022573208079556796



In [4]:

    
# p(X)
prob_df = df.ix[:,:'5'].apply(lambda x: x / df['sum'])
prob_df.head()

$ x \cdot p(x)$



In [5]:

    
col









    Out[5]:





array([ 0.5,  1. ,  1.5,  2. ,  2.5,  3. ,  3.5,  4. ,  4.5,  5. ])



In [6]:

    
# X*p(X)
Xprob_df = prob_df.mul(col)
Xprob_df.head()

mean : $\mu = E[X] = \sum x \cdot p(x)$



In [7]:

    
mean = Xprob_df.sum(axis=1)
mean.head()









    Out[7]:





0    4.171785
1    3.123774
2    4.035191
3    2.786390
4    3.384729
dtype: float64



In [8]:

    
prob_df['mean'] = Xprob_df.sum(axis=1)
prob_df.head()

std : $ \sigma = {\sqrt \sigma^2},(\sigma^2 = E[(X - \mu)^2] = \sum (x - \mu)^2 p(x)) $



In [9]:

    
# (X - u)
sub_df = pd.DataFrame(columns = col)
for index, row in prob_df.iterrows():
    sub = col - row['mean']
    sub_df.loc[len(sub_df)] = sub
sub_df.head()



In [10]:

    
# (X - u)^2
sub2_df = sub_df.applymap(lambda x: x*x)
sub2_df.head()



In [11]:

    
13.482005*0.000537









    Out[11]:





0.007239836685



In [12]:

    
# var(X) = (X - u)^2 * p(X)
var = sub2_df.mul(prob_df.ix[:,:'5']).sum(axis=1)
var.head()









    Out[12]:





0    0.391277
1    1.121530
2    0.802008
3    1.335124
4    1.033640
dtype: float64



In [13]:

    
np.sqrt(1.121530)









    Out[13]:





1.0590231347803503



In [14]:

    
std = var.map(np.sqrt)
std.head()









    Out[14]:





0    0.625521
1    1.059023
2    0.895549
3    1.155476
4    1.016681
dtype: float64

skewness : $\frac{\mu_3}{\sigma^3} = \operatorname{E}\left[\left(\frac{X-\mu}{\sigma}\right)^3 \right]$



In [15]:

    
# (X - u)^3
sub3_df = sub_df.applymap(lambda x: x*x*x)
sub3_df.head()



In [16]:

    
# theta^3 
std3 = std.map(lambda x: x**3)
std3.head()









    Out[16]:





0    0.244752
1    1.187726
2    0.718237
3    1.542703
4    1.050882
dtype: float64



In [17]:

    
# 
skew_df = sub3_df.apply(lambda x: x / std3)
skew_df.head()









    Out[17]:






  
    
      
      0.5
      1.0
      1.5
      2.0
      2.5
      3.0
      3.5
      4.0
      4.5
      5.0
    
  
  
    
      0
      -202.257906
      -130.372202
      -77.925268
      -41.852776
      -19.090401
      -6.573817
      -1.238696
      -0.020712
      0.144460
      2.321149
    
    
      1
      -15.207680
      -8.065079
      -3.604628
      -1.194870
      -0.204346
      -0.001597
      0.044836
      0.566411
      2.194587
      5.560822
    
    
      2
      -61.513543
      -38.930515
      -22.686322
      -11.736741
      -5.037548
      -1.544519
      -0.213431
      -0.000061
      0.139816
      1.250422
    
    
      3
      -7.747626
      -3.695273
      -1.379862
      -0.315233
      -0.015226
      0.006318
      0.235560
      1.158658
      3.261774
      7.031066
    
    
      4
      -22.843405
      -12.905144
      -6.370778
      -2.526621
      -0.658986
      -0.054189
      0.001457
      0.221639
      1.320041
      4.010350



In [18]:

    
skew = skew_df.mul(prob_df.ix[:,:'5']).sum(axis=1)
skew.head()









    Out[18]:





0   -1.002260
1   -0.347552
2   -1.052706
3   -0.127879
4   -0.452095
dtype: float64

expect_df



In [19]:

    
expect_df = pd.DataFrame({'mean':mean,
             'std':std,})
expect_df['skew'] = skew
expect_df.head()



In [20]:

    
expect_df.to_csv('../resource/preprocess_expectation_df.csv', index=False)

	0.5	1	1.5	2	2.5	3	3.5	4	4.5	5
0	7	10	14	83	50	1472	454	4509	4318	2108
1	1312	2238	2150	6749	6597	9397	16842	1367	9011	2459
2	228	316	956	1513	2367	7526	9953	7289	21200	14948
3	3615	4063	5424	8133	11525	6501	17566	765	7099	2340
4	787	1612	1329	6635	5251	13675	15493	4620	14473	4299

	0.5	1	1.5	2	2.5	3	3.5	4	4.5	5	sum
0	7	10	14	83	50	1472	454	4509	4318	2108	13025
1	1312	2238	2150	6749	6597	9397	16842	1367	9011	2459	58122
2	228	316	956	1513	2367	7526	9953	7289	21200	14948	66296
3	3615	4063	5424	8133	11525	6501	17566	765	7099	2340	67031
4	787	1612	1329	6635	5251	13675	15493	4620	14473	4299	68174

	0.5	1	1.5	2	2.5	3	3.5	4	4.5	5
0	0.000537	0.000768	0.001075	0.006372	0.003839	0.113013	0.034856	0.346180	0.331516	0.161843
1	0.022573	0.038505	0.036991	0.116118	0.113503	0.161677	0.289770	0.023519	0.155036	0.042308
2	0.003439	0.004767	0.014420	0.022822	0.035704	0.113521	0.150130	0.109946	0.319778	0.225474
3	0.053930	0.060614	0.080918	0.121332	0.171935	0.096985	0.262058	0.011413	0.105906	0.034909
4	0.011544	0.023645	0.019494	0.097324	0.077023	0.200590	0.227257	0.067768	0.212295	0.063059

	0.5	1	1.5	2	2.5	3	3.5	4	4.5	5
0	0.000269	0.000768	0.001612	0.012745	0.009597	0.339040	0.121996	1.384722	1.491823	0.809213
1	0.011287	0.038505	0.055487	0.232236	0.283757	0.485031	1.014194	0.094078	0.697662	0.211538
2	0.001720	0.004767	0.021630	0.045644	0.089259	0.340564	0.525454	0.439785	1.439001	1.127368
3	0.026965	0.060614	0.121377	0.242664	0.429838	0.290955	0.917202	0.045651	0.476578	0.174546
4	0.005772	0.023645	0.029241	0.194649	0.192559	0.601769	0.795399	0.271071	0.955328	0.315296

	0.5	1	1.5	2	2.5	3	3.5	4	4.5	5	mean
0	0.000537	0.000768	0.001075	0.006372	0.003839	0.113013	0.034856	0.346180	0.331516	0.161843	4.171785
1	0.022573	0.038505	0.036991	0.116118	0.113503	0.161677	0.289770	0.023519	0.155036	0.042308	3.123774
2	0.003439	0.004767	0.014420	0.022822	0.035704	0.113521	0.150130	0.109946	0.319778	0.225474	4.035191
3	0.053930	0.060614	0.080918	0.121332	0.171935	0.096985	0.262058	0.011413	0.105906	0.034909	2.786390
4	0.011544	0.023645	0.019494	0.097324	0.077023	0.200590	0.227257	0.067768	0.212295	0.063059	3.384729

	0.5	1.0	1.5	2.0	2.5	3.0	3.5	4.0	4.5	5.0
0	-3.671785	-3.171785	-2.671785	-2.171785	-1.671785	-1.171785	-0.671785	-0.171785	0.328215	0.828215
1	-2.623774	-2.123774	-1.623774	-1.123774	-0.623774	-0.123774	0.376226	0.876226	1.376226	1.876226
2	-3.535191	-3.035191	-2.535191	-2.035191	-1.535191	-1.035191	-0.535191	-0.035191	0.464809	0.964809
3	-2.286390	-1.786390	-1.286390	-0.786390	-0.286390	0.213610	0.713610	1.213610	1.713610	2.213610
4	-2.884729	-2.384729	-1.884729	-1.384729	-0.884729	-0.384729	0.115271	0.615271	1.115271	1.615271

	0.5	1.0	1.5	2.0	2.5	3.0	3.5	4.0	4.5	5.0
0	13.482005	10.060220	7.138435	4.716650	2.794865	1.373080	0.451295	0.029510	0.107725	0.685940
1	6.884191	4.510417	2.636642	1.262868	0.389094	0.015320	0.141546	0.767772	1.893998	3.520224
2	12.497573	9.212382	6.427192	4.142001	2.356810	1.071620	0.286429	0.001238	0.216048	0.930857
3	5.227579	3.191189	1.654799	0.618409	0.082019	0.045629	0.509239	1.472850	2.936460	4.900070
4	8.321660	5.686931	3.552203	1.917474	0.782745	0.148016	0.013287	0.378559	1.243830	2.609101

	0.5	1.0	1.5	2.0	2.5	3.0	3.5	4.0	4.5	5.0
0	-49.503025	-31.908856	-19.072364	-10.243550	-4.672414	-1.608955	-0.303173	-0.005069	0.035357	0.568106
1	-18.062561	-9.579106	-4.281312	-1.419179	-0.242707	-0.001896	0.053253	0.672741	2.606569	6.604734
2	-44.181303	-27.961337	-16.294156	-8.429762	-3.618153	-1.109331	-0.153294	-0.000044	0.100421	0.898100
3	-11.952283	-5.700707	-2.128717	-0.486311	-0.023489	0.009747	0.363398	1.787465	5.031947	10.846844
4	-24.005733	-13.561789	-6.694938	-2.655181	-0.692517	-0.056946	0.001532	0.232916	1.387208	4.214406

	0.5	1.0	1.5	2.0	2.5	3.0	3.5	4.0	4.5	5.0
0	-202.257906	-130.372202	-77.925268	-41.852776	-19.090401	-6.573817	-1.238696	-0.020712	0.144460	2.321149
1	-15.207680	-8.065079	-3.604628	-1.194870	-0.204346	-0.001597	0.044836	0.566411	2.194587	5.560822
2	-61.513543	-38.930515	-22.686322	-11.736741	-5.037548	-1.544519	-0.213431	-0.000061	0.139816	1.250422
3	-7.747626	-3.695273	-1.379862	-0.315233	-0.015226	0.006318	0.235560	1.158658	3.261774	7.031066
4	-22.843405	-12.905144	-6.370778	-2.526621	-0.658986	-0.054189	0.001457	0.221639	1.320041	4.010350

	mean	std	skew
0	4.171785	0.625521	-1.002260
1	3.123774	1.059023	-0.347552
2	4.035191	0.895549	-1.052706
3	2.786390	1.155476	-0.127879
4	3.384729	1.016681	-0.452095